*
*
* garch-mc-norm-experiment2
* 4/16/24
*
* -----------------------------------------------------------------------

cd "~/Dropbox/Benton-Jordan-Philips/final-JOP-replication"

* run garch sim program:
do "scripts-programs/synthgarch.ado"



* -----------------------------------------------------------------------------
* -----------------------------------------------------------------------------
* 	---------- SCENARIO 2 (T=1000, normal dist resids) ----------------------
* -----------------------------------------------------------------------------
/*	ARCH = 0.1
	GARCH = 0.5
	T = 1000
	beta_x = .5
	beta_z = -1
	constant = 0.5
	phi = 0.25
	x is continuous, z is dichotomous, epsilon is normal
	burnin = 250
	sims = 100
	scenname = scenario1
*/

loc dgp_sims = 500 // how many draws from the DGP are we taking?
loc rep_sims = 500 // how many reps OF a dgp_sim do we want to take?

* -----------------------------------------------------------------------
* -->	STEP 1: GENERATE SERIES. 
* this is done for all steps below
synthgarch, time(1000) garchparam(0.5) ///
 archparam(0.1) xparam(.5) zparam(-1) consparam(0.5) ///
 phiparam(0.25) burnin(250) seedz(43055029) sims(`dgp_sims') ///
 scenname(scenario2)
* -----------------------------------------------------------------------

* -----------------------------------------------------------------------
* -->	STEP 2: RUN BASE GARCH MODEL
 * create predictions and save all M of these in order to get a 95% CI that we will compare with our bs techniques
use "dgp_scenario2.dta", clear
set seed 2508564
tsset t
set obs 2000 // ensure we can store up to M results
* create blank variables to store the params:
gen normalitypval = .
gen garchterm = .
gen archterm = . 
gen hetx = .
gen hetz = .
gen constant = .

* now estimate M GARCH models, saving coefficients from HET eq:
forv i = 1/`dgp_sims' {
	* run garch
	cap arch y_`i' l.y_`i' x_`i' z_`i', arch(1) garch(1) het(x_`i' z_`i')
	
	if "`e(converged)'" == "1" {
		* a. perform a residual test (e.g., normality met?).
		predict res, residual
		sktest res
		replace normalitypval = r(p_chi2) in `i'
		drop res
	
		* store coefs:
		replace archterm = e(b)[1,8] in `i' // not sure if we need this
		replace garchterm = e(b)[1,9] in `i'
		replace hetx = e(b)[1,5] in `i'
		replace hetz = e(b)[1,6] in `i'
		replace constant = e(b)[1,7] in `i'
	}
	else {
		* do nothing, no convergence
	}
	
}

* now with the M sims, create stable predictions. We'll set X = 1 and Z = 1, no ARCH effects and will burn-in to get a stable prediction. I don't see a need to do anything different than our generating new variables for each burnin since we're doing all M at once.

* we don't care about intermediate values so we just want a stable prediction say 100 time points in:
gen preds = exp(constant + hetx*1 + hetz*1) // init guess
forv i = 1/100 {
	replace preds = garchterm*preds + exp(constant + hetx*1 + hetz*1) + 0*archterm // replace w/ past value to achieve stable GARCH prediction
}
su preds, det // so this is our 95% CI and ave prediction. we'll make UL and LL anyway along w/ median
gen pred_sd = r(sd)
_pctile preds, p(2.5, 50, 97.5)
gen pred_ll = r(r1)
gen pred_median = r(r2)
gen pred_ul = r(r3)

* we only need to keep coefs and prediction
keep normalitypval - pred_ul
qui su normalitypval
keep in 1/`r(N)' // only keep actual # of sims
foreach var of varlist normalitypval - pred_ul {
	rename `var' `var'_BASE
}
gen sims = _n
compress
save "data/dgp_scenario2_BASEPRED.dta", replace
* -----------------------------------------------------------------------

* -----------------------------------------------------------------------
* -->	STEP 3: PARAMETRIC BOOTSTRAP
* technically we could just use the models above. Also, unlike above we're creating 1000 predictions PER SIM. I think to keep things feasible for each sim we should record the following in long form (so length of this dataset is length of sims, not length of each bootstrap)
* estimate GARCH model on dataset m. take 1000 P-bs draws
* record ave beta's and SD. 
* use 1000 P-bs draws to create predictions. Take median and CIs of this. record them.
* thus we'll have an M-long matrix where each column is bar(beta), sd(beta), median(pred), ul(pred), ll(pred)
* and I think since we're clearing in order to use corr2data we should keep it all in a matrix
mat storage = J(`dgp_sims',14,.) // cols are:
* 1. estimated B-bar, ARCH term
* 2. estimated B-bar, GARCh term
* 3. estimated B-bar, Het X term
* 4. estimated B-bar, Het Z term
* 5. estimated B-bar, Het constant
* 6. estimated B-sd, ARCH term
* 7. estimated B-sd, GARCh term
* 8. estimated B-sd, Het X term
* 9. estimated B-sd, Het Z term
* 10. estimated B-sd, Het constant
* 11. median prediction
* 12. ll prediction
* 13. ul prediction
* 14. sd prediction

use "dgp_scenario2.dta", clear
set seed 987789
tsset t
set obs 2000 // ensure we can store up to M results


* now estimate M GARCH models, saving coefficients from HET eq:
forv i = 1/`dgp_sims' {
	* run garch
	cap arch y_`i' l.y_`i' x_`i' z_`i', arch(1) garch(1) het(x_`i' z_`i')
	
	if "`e(converged)'" == "1" {
		mat B = e(b)
		mat VCV = e(V)
	
		preserve
		clear
		corr2data junk1 junk2 junk3 junk4 hetx hetz constant archterm garchterm, n(`rep_sims') means(B) cov(VCV)
	
		* summarize and place means/sd
		qui su hetx
		mat storage[`i',3] = r(mean)
		mat storage[`i',8] = r(sd)
	
		qui su hetz
		mat storage[`i',4] = r(mean)
		mat storage[`i',9] = r(sd)
	
		qui su constant
		mat storage[`i',5] = r(mean)
		mat storage[`i',10] = r(sd)
	
		qui su archterm
		mat storage[`i',1] = r(mean)
		mat storage[`i',6] = r(sd)
	
		qui su garchterm
		mat storage[`i',2] = r(mean)
		mat storage[`i',7] = r(sd)
	
		* now create preds
		gen preds = exp(constant + hetx*1 + hetz*1) // init guess
		forv j = 1/100 {
			replace preds = garchterm*preds + exp(constant + hetx*1 + hetz*1) + 0*archterm // replace w/ past value to achieve stable GARCH prediction
		}
		* and get med and 95% CIs
		_pctile preds, p(2.5, 50, 97.5)
		mat storage[`i',12] = r(r1)
		mat storage[`i',11] = r(r2)
		mat storage[`i',13] = r(r3)
		su preds
		mat storage[`i',14] = r(sd)
		restore	
	}
	else {
		* do nothing. no convergence
	}
}
mat list storage
clear
svmat storage
* 1. estimated B-bar, ARCH term
rename storage1 archterm_mean
* 2. estimated B-bar, GARCh term
rename storage2 garchterm_mean
* 3. estimated B-bar, Het X term
rename storage3 hetx_mean
* 4. estimated B-bar, Het Z term
rename storage4 hetz_mean
* 5. estimated B-bar, Het constant
rename storage5 constant_mean
* 6. estimated B-sd, ARCH term
rename storage6 archterm_sd
* 7. estimated B-sd, GARCh term
rename storage7 garchterm_sd
* 8. estimated B-sd, Het X term
rename storage8 hetx_sd
* 9. estimated B-sd, Het Z term
rename storage9 hetz_sd
* 10. estimated B-sd, Het constant
rename storage10 constant_sd
* 11. median prediction
rename storage11 pred_median_PBS
* 12. ll prediction
rename storage12 pred_ll_PBS
* 13. ul prediction
rename storage13 pred_ul_PBS
* 14. sd prediction
rename storage14 pred_sd
gen sims = _n
compress
save "data/dgp_scenario2_PBSPRED.dta", replace
* -----------------------------------------------------------------------


* -----------------------------------------------------------------------
* -->	STEP 4: MAXIMUM ENTROPY BOOTSTRAP
* looks like we might be able to directly run R through stata: https://julianreif.com/rscript/
* -----------------------------------------------------------------------
mat storage = J(`dgp_sims',14,.) // cols are:
* 1. estimated B-bar, ARCH term
* 2. estimated B-bar, GARCh term
* 3. estimated B-bar, Het X term
* 4. estimated B-bar, Het Z term
* 5. estimated B-bar, Het constant
* 6. estimated B-sd, ARCH term
* 7. estimated B-sd, GARCh term
* 8. estimated B-sd, Het X term
* 9. estimated B-sd, Het Z term
* 10. estimated B-sd, Het constant
* 11. median prediction
* 12. ll prediction
* 13. ul prediction
* 14. sd prediction


set seed 10023
* Run rscript.
* ARGS are: number in the boot (e.g., y_`1' boots y_1), dataset to boot from, length of T, number of reps
forv i = 1/`dgp_sims' {
	rscript using "scripts-programs/me-call.R", args("`i'" "dgp_scenario2.dta" "1000" "`rep_sims'")

	* now load in the generated data:
	import delimited "me-draws.csv", clear 
	tsset time
	set obs 2000
	* create empty slots to hold betas:
	foreach name in hetx hetz constant archterm garchterm {
		gen `name' = .
	}

	* now for each of the synthetic Y's created for this particular y_`i', we'll run b GARCH models (the OG vars are dynamically updated via rscript)
	forv b = 1/`rep_sims' {
		cap arch meboot_y_`b' l.meboot_y_`b' og_x og_z, arch(1) garch(1) het(og_x og_z)
	
		if "`e(converged)'" == "1" {
			* store betas
			replace hetx = _b[HET:og_x] in `b'
			replace hetz = _b[HET:og_z] in `b'
			replace constant = _b[HET:_cons] in `b'
			replace archterm = _b[ARCH:L.arch] in `b'
			replace garchterm = _b[ARCH:L.garch] in `b'
		}
		else {
			* do nothing. No convergence
		}
	}
	
	* summarize and place means/sd
	qui su hetx
	mat storage[`i',3] = r(mean)
	mat storage[`i',8] = r(sd)
	
	qui su hetz
	mat storage[`i',4] = r(mean)
	mat storage[`i',9] = r(sd)
	
	qui su constant
	mat storage[`i',5] = r(mean)
	mat storage[`i',10] = r(sd)
	
	qui su archterm
	mat storage[`i',1] = r(mean)
	mat storage[`i',6] = r(sd)
	
	qui su garchterm
	mat storage[`i',2] = r(mean)
	mat storage[`i',7] = r(sd)
	
	* now create preds
	gen preds = exp(constant + hetx*1 + hetz*1) // init guess
	forv j = 1/100 {
		replace preds = garchterm*preds + exp(constant + hetx*1 + hetz*1) + 0*archterm // replace w/ past value to achieve stable GARCH prediction
	}
	* and get med and 95% CIs
	_pctile preds, p(2.5, 50, 97.5)
	mat storage[`i',12] = r(r1)
	mat storage[`i',11] = r(r2)
	mat storage[`i',13] = r(r3)
	su preds
	mat storage[`i',14] = r(sd)
	
}

mat list storage
clear
svmat storage
* 1. estimated B-bar, ARCH term
rename storage1 archterm_mean
* 2. estimated B-bar, GARCh term
rename storage2 garchterm_mean
* 3. estimated B-bar, Het X term
rename storage3 hetx_mean
* 4. estimated B-bar, Het Z term
rename storage4 hetz_mean
* 5. estimated B-bar, Het constant
rename storage5 constant_mean
* 6. estimated B-sd, ARCH term
rename storage6 archterm_sd
* 7. estimated B-sd, GARCh term
rename storage7 garchterm_sd
* 8. estimated B-sd, Het X term
rename storage8 hetx_sd
* 9. estimated B-sd, Het Z term
rename storage9 hetz_sd
* 10. estimated B-sd, Het constant
rename storage10 constant_sd
* 11. median prediction
rename storage11 pred_median_PBS
* 12. ll prediction
rename storage12 pred_ll_PBS
* 13. ul prediction
rename storage13 pred_ul_PBS
* 14. sd prediction
rename storage14 pred_sd
gen sims = _n
compress
save "data/dgp_scenario2_MEPRED.dta", replace


* -----------------------------------------------------------------------
* -->	STEP 5: RESIDUAL BOOTSTRAP
* run residual bootstrap program
do "scripts-programs/bootr2.ado" // run resid bootstrap program
do "scripts-programs/bootrone.ado" // run resid bootstrap program
loc dgp_sims = 500 // how many draws from the DGP are we taking?
loc rep_sims = 500 // how many reps OF a dgp_sim do we want to take?

mat storage = J(`dgp_sims',14,.) // cols are:
* 1. estimated B-bar, ARCH term
* 2. estimated B-bar, GARCh term
* 3. estimated B-bar, Het X term
* 4. estimated B-bar, Het Z term
* 5. estimated B-bar, Het constant
* 6. estimated B-sd, ARCH term
* 7. estimated B-sd, GARCh term
* 8. estimated B-sd, Het X term
* 9. estimated B-sd, Het Z term
* 10. estimated B-sd, Het constant
* 11. median prediction
* 12. ll prediction
* 13. ul prediction
* 14. sd prediction

use "dgp_scenario2.dta", clear
tsset t
set seed 68004
set obs 2000 // just to make sure we have enough room

gen _sigma2 = . // for storing sigma2. It gets replaced every loop 
forv b = 1/`rep_sims' { // blank vector of bs created y 
	gen y_b`b' = .
}
gen preds = . // for storing predictions

* create empty slots to hold estimated betas:
	foreach name in hetx hetz constant archterm garchterm {
		gen `name' = .
}

forv i = 1/`dgp_sims' {
	* Estimate GARCH model
	arch y_`i' l.y_`i' x_`i' z_`i', arch(1) garch(1) het(x_`i' z_`i')
	cap drop nu
	cap drop sigma2
	cap drop epsilon
	predict nu, resid // nu_t = \sigma_t\epsilon_t
	predict sigma2, variance // sigma^2_t
	gen epsilon = nu/sqrt(sigma2) // rescale, nu_t/\sigma_t = \epsilon_t
	su epsilon, meanonly // mean-center
	replace epsilon = epsilon-r(mean)
	
	* loop across bootstraps:
	forv b = 1/`rep_sims' {
		* Create initial values of y and sigma2 at t=0. Previously we were trying a burn-in process but that meant we lost an additional observation
		
		* draw residuals:
		bootrone, resid(epsilon) // this will be the t=0 error which we need for sigma2
		loc residvalue = r(residvalue) // only saving as loc b/c of repeat call below
		* resid for t=1 error for y
		bootrone, resid(epsilon) 
		loc residvaluey = r(residvalue)

		
		* independently from resid, draw bs at same time for y and sigma2
		bootrone, yvalue(y_`i') sigma2value(sigma2)
		
		* init sigma2 at t=1
		replace _sigma2 = _b[ARCH:L.arch]*`residvalue'^2*r(sigma2init) + ///
			_b[ARCH:L.garch]*r(sigma2init) + exp(_b[HET:_cons] + ///
			_b[HET:x_`i']*x_`i' + _b[HET:z_`i']*z_`i') in 1
		* also sigma2 at t=2 since we need `residvaluey'
		replace _sigma2 = _b[ARCH:L.arch]*`residvaluey'^2*l._sigma2 + ///
			_b[ARCH:L.garch]*l._sigma2 + exp(_b[HET:_cons] + ///
			_b[HET:x_`i']*x_`i' + _b[HET:z_`i']*z_`i') in 2
			
		* init y at t=1
		replace y_b`b' = _b[y_`i':_cons] + _b[y_`i':L.y_`i']*r(yinit) + ///
			_b[y_`i':x_`i']*x_`i' + _b[y_`i':z_`i']*z_`i' + ///
			sqrt(_sigma2)*`residvaluey' in 1
		
		* pull bootstrap residuals for t=2/T
		cap drop boot_e
		bootr2, old(epsilon) newname(boot_e) minT(2)
		
		* now recursively generate sigma2 and y for 2/T
		replace _sigma2 = _b[ARCH:L.arch]*l.boot_e^2*l._sigma2 + ///
			_b[ARCH:L.garch]*l._sigma2 + exp(_b[HET:_cons] + ///
			_b[HET:x_`i']*x_`i' + _b[HET:z_`i']*z_`i') in 3/L
			
		replace y_b`b' = _b[y_`i':_cons] + _b[y_`i':L.y_`i']*l.y_b`b' + ///
			_b[y_`i':x_`i']*x_`i' + _b[y_`i':z_`i']*z_`i' + sqrt(_sigma2)*boot_e in 2/L
	}
	
	* now that everything's generated for y_`i', estimate `b' bootstrap garch's for it:
	forv b = 1/`rep_sims' {
		* now estimate GARCH for this
		cap arch y_b`b' l.y_b`b' x_`i' z_`i', arch(1) garch(1) het(x_`i' z_`i')
		
		if "`e(converged)'" == "1" {
			* store betas
			replace hetx = _b[HET:x_`i'] in `b'
			replace hetz = _b[HET:z_`i'] in `b'
			replace constant = _b[HET:_cons] in `b'
			replace archterm = _b[ARCH:L.arch] in `b'
			replace garchterm = _b[ARCH:L.garch] in `b'
		}
		else {
			* do nothing. No convergence
		}
	}
	
	* summarize and place means/sd
	qui su hetx
	mat storage[`i',3] = r(mean)
	mat storage[`i',8] = r(sd)
	
	qui su hetz
	mat storage[`i',4] = r(mean)
	mat storage[`i',9] = r(sd)
	
	qui su constant
	mat storage[`i',5] = r(mean)
	mat storage[`i',10] = r(sd)
	
	qui su archterm
	mat storage[`i',1] = r(mean)
	mat storage[`i',6] = r(sd)
	
	qui su garchterm
	mat storage[`i',2] = r(mean)
	mat storage[`i',7] = r(sd)	
	
	
	* now create preds
	replace preds = exp(constant + hetx*1 + hetz*1) // init guess
	forv j = 1/100 {
		replace preds = garchterm*preds + exp(constant + hetx*1 + hetz*1) + ///
			0*archterm // replace w/ past value to achieve stable GARCH prediction
	}
	* and get med and 95% CIs
	_pctile preds, p(2.5, 50, 97.5)
	mat storage[`i',12] = r(r1)
	mat storage[`i',11] = r(r2)
	mat storage[`i',13] = r(r3)
	su preds
	mat storage[`i',14] = r(sd)
	
} // close dgp_sims loop

mat list storage
clear
svmat storage
* 1. estimated B-bar, ARCH term
rename storage1 archterm_mean
* 2. estimated B-bar, GARCh term
rename storage2 garchterm_mean
* 3. estimated B-bar, Het X term
rename storage3 hetx_mean
* 4. estimated B-bar, Het Z term
rename storage4 hetz_mean
* 5. estimated B-bar, Het constant
rename storage5 constant_mean
* 6. estimated B-sd, ARCH term
rename storage6 archterm_sd
* 7. estimated B-sd, GARCh term
rename storage7 garchterm_sd
* 8. estimated B-sd, Het X term
rename storage8 hetx_sd
* 9. estimated B-sd, Het Z term
rename storage9 hetz_sd
* 10. estimated B-sd, Het constant
rename storage10 constant_sd
* 11. median prediction
rename storage11 pred_median_PBS
* 12. ll prediction
rename storage12 pred_ll_PBS
* 13. ul prediction
rename storage13 pred_ul_PBS
* 14. sd prediction
rename storage14 pred_sd
gen sims = _n
compress
save "data/dgp_scenario2_RESIDPRED.dta", replace	
	